import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

diabetes = datasets.load_diabetes()

# inserts a new dimension and selects thirdth column
y = diabetes.target
X = diabetes.data[:,np.newaxis,2] 
print(diabetes.data.shape)
print(diabetes.data[:,2].shape)
print(diabetes.data[:,np.newaxis,2].shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    random_state = 10)

# obtains an instance of LinearRegression
linear_regression = linear_model.LinearRegression()
# creates the model using the training data  
linear_regression.fit(X_train, y_train)              

y_train_predicted = linear_regression.predict(X_train) # train predictions
y_test_predicted = linear_regression.predict(X_test)   #  test predictions

# quality obtained in the training set
train_MSD = mean_squared_error(y_train, y_train_predicted)
# quality obtained in the testing set 
test_MSD = mean_squared_error(y_test, y_test_predicted)    

# Draw training and testing results
fig, axs = plt.subplots(1,2, figsize=(15,4)) 
axs[0].scatter(X_train, y_train,  color='orange')
axs[0].plot(X_train, y_train_predicted, color='black')
axs[0].set_title('Training set, MSD:{:.0f}'.format(train_MSD))

axs[1].scatter(X_test, y_test,  color='gray')
axs[1].plot(X_test, y_test_predicted, color='black')
axs[1].set_title('Testing set, MSD:{:.0f}'.format(test_MSD))

plt.show()